package cn.tonyandmoney.lib.webmagic

import cn.tonyandmoney.lib.webmagic.pages.MeePageProcessor
import cn.tonyandmoney.lib.webmagic.pipeline.LawSavePipeline
import us.codecraft.webmagic.Spider
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader

/**
 * Created by niantuo on 2019/2/14.
 */

const val CHROME_DRiVAR="C:\\Program Files (x86)\\Google\\Chrome\\chromedriver_win32\\chromedriver.exe"
const val selenuim_config="L:\\WebMagic\\config.ini"

fun main(args: Array<String>) {


     val articleRegex = "^(http://)(zfs|www)*(\\.mee\\.gov\\.cn)(/\\w+)+(\\.shtml)"
     val indexUrl = "http://www\\.mee\\.gov\\.cn/gzfw_13107/zcfg/(fl)*(.)*"

    System.setProperty("selenuim_config", selenuim_config)

    val pipeline = LawSavePipeline()

    Spider.create(MeePageProcessor().setPageRegex(indexUrl).setArticleRegex(articleRegex))
            .addUrl("http://www.mee.gov.cn/gzfw_13107/zcfg/fl/index.shtml")
            .addPipeline(pipeline)
            .setDownloader(SeleniumDownloader(CHROME_DRiVAR))
            .thread(1)
            .run()


    println("total=>"+pipeline.total)

    pipeline.articles.forEach {
        println("\n$it")
    }


}